*This program performs a simulation to compare the uncertainty of household-level distributions of
*consumption with the uncerainty of the cross-sectional distribution of outcomes
*The program is run for the project on consumption uncertainty and precautionary saving using DNB data
*November 2018 - Dimitris Christelis, Dimitris Georgarakos, Tullio Jappelli, Maarten van Rooij

version 12.1
clear all
macro drop _all
set more off
set maxvar 10000

*ATTENTION - ATTENTION - ATTENTION 
*The following line defines the file path. Please adapt to own computer
do "c:/foldp/foldp.do"
global projf "${foldp}/DNB_survey_consumption/ConGrowth/Replication"

global stdate $S_DATE 
global sttime $S_TIME
noisily display "Begin program at ${sttime} on ${stdate}"   /* time stamp */

*log 
cap log close
log using "${projf}/Programs/Simulation/distribution_simulation.log", replace

*Setting the seed
set seed 1234

*Number of simulation runs
local nsim = 10000

*Number of households in the sample
local nobs = 3000

*Median minimum and maximum in the sample
sca lm = 1400
sca um = 1750

*Coefficient by which the household-level minima and maxima can vary from the 
*respective sample medians
sca het = 0.1
assert het>0 & het<1

*Local of distributions used (simple triangular and uniform)
local distlist simtriang uniform
local list_n: word count `distlist'

*Setting the number of observations equal to the maximum of the number of households and the number of simulation runs
local mxn = max(`nobs',`nsim')
set obs `mxn'

*Generating an ID number
qui gen double id = _n if _n<=`nobs'

*Generating two household-level coefficients using random draws. 
*These coefficients will multiply the sample median minimum and maximum to generate household heterogeneity
*over these two magnitudes
forvalues k=1/2 {
   
   local seed = 3617+`k'
   set seed `seed'
   qui gen double coeff`k' = (2*het)*uniform() + (1-het) if id<.
}


*Calculating the household-level minimum and maximum
qui gen double lobv = coeff1*lm if id<.
qui gen double upbv = coeff2*um if id<.
assert lobv<upbv if id<.

*Calculating the mid-point between the minimum and maximu
qui gen mid = (lobv+upbv)/2 if id<.

forvalues m=1/`list_n' {

   local dist: word `m' of `distlist'

   di in yellow "Distribution is `dist'"

   *Creating a variable that captures the results of the simulation results
   qui gen double sim_results = .

   *In each simulation run we have a sample of households that has a size equal to the parameter `nobs'
   *For each simulation run we draw from the distribution of each of these `nobs' households 
   *and compute the cross-sectional variance of the observed outcome
   forvalues k=1/`nsim' {
     
     
      *Generating a random number for each households
      local seed = 5678 + `k'
      set seed `seed'
      qui gen double uni = uniform() if id<.
      
      
      qui gen double outcome = . 
      
      *Draw from a simple triangular distribution
      if "`dist'"=="simtriang" {
      
         qui replace outcome = lobv + sqrt(uni*(upbv-lobv)*(mid-lobv)) if ///
	     uni>=0 & uni<(mid-lobv)/(upbv-lobv) & id<.
         qui replace outcome = upbv - sqrt((1-uni)*(upbv-lobv)*(upbv-mid)) if ///
	     uni>=(mid-lobv)/(upbv-lobv) & uni<=1 & id<.
      }
      
      *Draw from a uniform distribution
      else if "`dist'"=="uniform" {
      
         qui replace outcome = lobv+(uni*(upbv-lobv)) if id<.
      }
      
      assert outcome<. if id<.
      
      *Calculating the std deviation of the observed outcome distribution and recording it in the
      *appropriate line of the variable denoting the simulation results
      qui su outcome
      qui replace sim_results = r(sd) if _n==`k'
      drop outcome uni

   *End of loop over simulation runs
   }


   *Recording the household-level standard-deviations

   *Simple triangular distribution
   if "`dist'"=="simtriang" {

      qui gen double sd_ind = sqrt((((lobv)^2)+((upbv)^2)+(mid^2) - ///
         (upbv*lobv)-(upbv*mid)-(mid*lobv))/18) if id<.
   }
      
   *Uniform distribution
   else if "`dist'"=="uniform" {   

      qui gen double sd_ind = sqrt(((upbv-lobv)^2)/12) if id<.
   }
      
   *We then calculate the average across simulation runs of the standard deviation of the observed outcome
   *and compare it to the average of the standard deviations of the household-level distributions
   
   *Creating a matrix of results
   mat B_`dist'= J(3,4,.)
   mat colnames B_`dist' = coeff se low_ci95 high_ci95
   mat rownames B_`dist' = `dist'_sim `dist'_sample blank
   
   *Recording the mean, std. error, and CIs over the simulation draws
   qui su sim_results
   sca m_simr = r(mean)
   sca se_simr = r(sd)
   sca ci95_low = m_simr - se_simr*invttail(`nsim'-1,0.025)
   sca ci95_high = m_simr + se_simr*invttail(`nsim'-1,0.025)

   mat B_`dist'[1,1] = m_simr
   mat B_`dist'[1,2] = se_simr
   mat B_`dist'[1,3] = ci95_low
   mat B_`dist'[1,4] = ci95_high

   *Recording the mean of the household-level std deviations
   qui mean sd_ind

   mat A2 = r(table)
   mat b2 = A2["b",1]
   mat se2 = A2["se",1]
   mat ll2 = A2["ll",1]
   mat ul2 = A2["ul",1]

   forvalues l=2/2 {

   mat B_`dist'[`l',1]=b`l'[1,1]
   mat B_`dist'[`l',2]=se`l'[1,1]
   mat B_`dist'[`l',3]=ll`l'[1,1]
   mat B_`dist'[`l',4]=ul`l'[1,1]

   }

   drop sd_ind sim_results

   if `m'==1 {

      mat B=B_`dist'
   }
   else if `m'>1 {

      mat B=(B\B_`dist')
   }

   if `m'==`list_n' {

      mat list B
   }

*End of loop over distributions
}

*Saving the matrix of results
mat2txt, mat(B) saving("${projf}/Results/Simulation/Results_distribution_simulation_nobs`nobs'_nsim`nsim'.txt") replace format(%15.6f) ///      
  title("Results for simulation of household-level uncertainty, $S_DATE, $S_TIME)")      
mat drop _all

drop _all

*Time stamp
noisily display "Program started at: ${sttime} of ${stdate}"      /* beginning time stamp */
noisily display "Program finished at: $S_TIME of $S_DATE"       /*ending time stamp*/       

cap log close


